org 100h

  push 0x8000
  pop ds      ; table: cos
  push 0x7000
  pop fs      ; table: 1/cos
;  push 0x4000
;  pop gs      ; backbuffer 256x256, 2 bytes/pixel
  xor bp,bp

;Cos table with 16384 entries
  fninit
COS_TAB:
  imul bx,[bp+di],4 ; bx=[ss:bp+di]=[ss:-2]=angle (0 on init)
  fild word[bp+di]
  fidiv word[bp+C16K_DIV_2PI]
  fcos           ;; cos(angle/65536*2pi): adjust period to 2pi
  fst dword[bx]
  fidivr word[bp+C2] ;; 2/cos(...)
  fstp dword[fs:bx]
  inc word[bp+di] ; next angle
  jnz COS_TAB     ; bx=4


  mov ax,0x4f02
  mov bx,0x117
  int 10h       ; 1024x768 16-bit mode

  lea bp,[bx+si]
%define d(xx) dword[byte bp+xx-0x217]
%define w(xx) word[byte bp+xx-0x217]


M:
;  add w(T),652*4
;  add w(T2),403*4  ; (sqrt(5) - 1) / 2
;  add w(T3),922*4  ; sqrt(2)

;  add w(T),653*4    ; 1/16 * 65536/2pi
;  add w(T2),452*4   ; 1/16 * 65536/2pi * 0.693147180559945309417232121458177 ln(2)
;  add w(T3),942*4   ; 1/16 * 65536/2pi * 1.44269504088896340735992468100189  1/ln(2)

  add w(T),163*4    ; 1/16 * 65536/2pi
  add w(T2),113*4   ; 1/16 * 65536/2pi * 0.693147180559945309417232121458177 ln(2)
  add w(T3),235*4   ; 1/16 * 65536/2pi * 1.44269504088896340735992468100189  1/ln(2)

  mov bx,w(T2)
  fld dword[bx]         ;; cos(t2)
  fidiv w(C10)          ;; cos(t2)/10
  fld1
  fsubrp st1,st0        ;; scale=1-cos(t2)/10
  fld st0               ;; scale scale

  mov bx,w(T)
  fmul dword[bx+0x4000] ;; S=sin(t)*scale scale
  fstp d(S)
  fmul dword[bx]        ;; C=cos(t)*scale
  fstp d(C)


; Compute 256x256 pixels to backbuffer
  xor di,di  ; di=pixel coords (Y:X)
P:
  fldz
  fldz
  fldz
  fldz       ; K=0 R=0 G=0 B=0

  mov ax,di
  shr ax,8
  mov [bp+si],ax
  fild word[bp+si]
  fidiv w(C192) ;; y[0..1] K R G B

  mov ax,di
  mov ah,0
  mov [bp+si],ax
  fild word[bp+si]
  fidiv w(C192) ;; x[0..1.33] y[0..1] K R G B

  stc
  call LEN
  fmul d(CHALF)
  fistp dword[bp+si] ; ax = d = length(x,y)/2 as cos_index
  imul ax,[bp+si],4

  mov cl,16     ; cx = i
I:
; rotate and scale
  ;[x] = [C -S]*[x]
  ;[y]   [S  C] [y]
  stc
R fld st1         ;; y x y K R G B    | x Sy x Cy K R G B
  fmul d(C)       ;; Cy x y K R G B   | Cx Sy x Cy K R G B
  fxch st2        ;; y x Cy K R G B   | x Sy Cx Cy K R G B
  fmul d(S)       ;; Sy x Cy K R G B  | Sx Sy Cx Cy K R G B
  cmc
  jnc R
  faddp st3,st0  ;; Sy Cx Sx+Cy K R G B
  fsubp st1,st0  ;; x=Cx-Sy y=Sx+Cy K R G B

; square fold for now
F fsub d(CHALF) ;; x=x-0.5 y=y-0.5 K R G B
  fist dword[bp+si]
  fisub dword[bp+si]
  fxch st1
  cmc
  jnc F         ;; x=x-round(x) y=y-round(y) K R G B

; interfering concentric circles
  call LEN
  fistp dword[bp+si]
  imul bx,[bp+si],5*4 ; 65536/2pi * (5*length(x,y)
  add bx,ax           ;              + d
  sub bx,w(T3)        ;              - t3)
  fld dword[fs:bx]    ;; k=2/cos(5*length(x,y) + d - t3) x y K R G B
  fadd st3,st0        ;; k x y K+=k R G B

; RGB += k * ( 0.5 + cos(3*(t2 - d + i/100) + [2 1 0]) );
  imul bx,cx,26*4     ; bx = q = 65536/2pi * (i/100
  sub bx,ax           ;                       - d
  add bx,w(T2)        ;                       + t2
  imul bx,3           ;                      ) * 3

  fld dword[bx] ;; cos(q) k x y K R G B
  fmul st1      ;; k*cos(q) k x y K R G B
  faddp st7,st0 ;; k x y K R G B+=k*cos(q)

  fld dword[bx+2608*4]
  fmul st1
  faddp st6,st0

  fmul dword[bx+2608*4*2]
  faddp st4,st0 ;; x y K R G B

  loop I

  fcompp
  fmul d(CHALF) ;; K/=2 R G B
  fadd st1,st0
  fadd st2,st0
  faddp st3,st0 ;; R+=K G+=K B+=K

; RGB = RGB*RGB/256;  // square the sum for better contrast

  mov cl,3
COL:
  fmul st0
  fistp word[bp+si]
  mov bx,[bp+si]
  cmp bh,31
  jb NO_CLAMP
  mov bh,31
NO_CLAMP:
  shl ax,5
  add al,bh        ; .rrr|rrgg|gggb|bbbb
  loop COL
  shl ax,1         ; rrrr|rggg|ggbb|bbb.
  sub al,bh        ; rrrr|rggg|gg.b|bbbb

  push di
  add di,di
  mov bx,0x4000
  jnc DINZ
  mov bh,0x50
DINZ:
  mov es,bx
  stosw
  pop di

  inc di
  jnz P

; Copy from backbuffer.

  push si
  
  push 0xa000
  pop es
  xor bx,bx  ; y: 0..767
Y:
  push bx
  mov dx,bx
  shr dx,5       ; page: 0..23
  mov ax,0x4f05  ; each line: set window, assume 64kB granularity
  xor bx,bx      ; bh=0 bl=window=0 dx=page
  int 10h
  pop bx

  mov cx,-512  ; x: -512..511
X:
  lea ax,[bx-384]  ; -384..383
  mov dx,cx        ; -512..511
  
  test dx,dx
  jns XPOS
  not ax
  not dx
XPOS:

  test ax,ax
  jns YPOS
  not ax
  xchg ax,dx
YPOS:

  shr ax,1
  shr dx,1
  mov dh,al
  
  mov ax,0x4000>>13
  add dx,dx
  rcl ax,13
  mov gs,ax
  mov si,dx
  
  gs movsw

  inc cx
  cmp ch,2
  jne X

  inc bx
  cmp bh,3
  jne Y

  pop si

  in al,60h ; ESC check
  cmp al,1
  jne M

  mov ax,3 ; text mode
  int 10h
  ret

LEN: ;; x y -> r=sqrt(x*x+y*y)*16384/2pi x y  ; assume cf=0
  fld st1
  fmul st0
  cmc
  jnc LEN
  faddp st1,st0
  fsqrt
  fimul w(C16K_DIV_2PI)
  ret

C2    dw 2
C10   dw 10
C192  dw 192
C16K_DIV_2PI dw 2608 ; 16384/2pi

CHALF dd 0.5

T dw 0
T2 dw 0
T3 dw 0

section .bss

;T resw 1
;T2 resw 1
;T3 resw 1

C resd 1
S resd 1
